import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
df = pd.read_csv("iris.csv")
print(df)
sepal.length sepal.width petal.length petal.width variety 0 5.1 3.5 1.4 0.2 Setosa 1 4.9 3.0 1.4 0.2 Setosa 2 4.7 3.2 1.3 0.2 Setosa 3 4.6 3.1 1.5 0.2 Setosa 4 5.0 3.6 1.4 0.2 Setosa .. ... ... ... ... ... 145 6.7 3.0 5.2 2.3 Virginica 146 6.3 2.5 5.0 1.9 Virginica 147 6.5 3.0 5.2 2.0 Virginica 148 6.2 3.4 5.4 2.3 Virginica 149 5.9 3.0 5.1 1.8 Virginica [150 rows x 5 columns]
feature_columns = ['sepal.length', 'sepal.width', 'petal.length', 'petal.width']
X = df[feature_columns].values
y = df['variety'].values
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y = le.fit_transform(y)
print(y)
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2]
plt.figure()
df.boxplot(by="variety", figsize=(15, 10))
plt.show()
<Figure size 432x288 with 0 Axes>
import plotly.express as px
fig = px.box(df, y="variety")
fig.show()
import plotly.express as px
fig = px.box(df, x="sepal.length", y="sepal.width",points="all", color="variety")
fig.show()
import plotly.express as px
fig = px.scatter_matrix(df, dimensions=["sepal.width", "sepal.length", "petal.width", "petal.length"], color="variety")
fig.show()
import plotly.express as px
df = px.data.iris()
fig = px.scatter_3d(df, x='sepal_length', y='sepal_width', z='petal_width',color='petal_length', symbol='species')
fig.show()
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix, accuracy_score
classifier = KNeighborsClassifier(n_neighbors=1)
classifier.fit(X_train, y_train)
y_pred = classifier.predict(X_test)
print(y_pred)
[2 1 0 2 0 2 0 1 1 1 2 1 1 1 1 0 1 1 0 0 2 1 0 0 2 0 0 1 1 0]
confusion_matrix = confusion_matrix(y_test, y_pred)
print(confusion_matrix)
[[11 0 0] [ 0 13 0] [ 0 0 6]]
accuracy = accuracy_score(y_test, y_pred)*100
print('Accuracy of the model:' + str(round(accuracy, 2)) + ' %.')
Accuracy of the model:100.0 %.
from sklearn.metrics import precision_score, recall_score, f1_score
prec = precision_score(y_test, y_pred,average='macro')
print("Precision:", prec)
# calculate recall
rec = recall_score(y_test, y_pred,average='macro')
print("Recall:", rec)
# calculate f1-score
f1 = f1_score(y_test, y_pred,average='macro')
print("F1-score:", f1)
Precision: 1.0 Recall: 1.0 F1-score: 1.0
from sklearn.neighbors import KNeighborsClassifier
import joblib
# train the model
knn = KNeighborsClassifier()
knn.fit(X_train, y_train)
# save the model to disk
filename = 'trained_model.pkl'
joblib.dump(knn, filename)
# fit and transform the data
iris_species = ['setosa', 'versicolor', 'virginica']
le.fit(iris_species)
#save the label classes
filename_classes = "label_classes.pkl"
joblib.dump(le.classes_, filename_classes)
['label_classes.pkl']
# load the trained model
model = joblib.load("trained_model.pkl")
# load the label encoder
le = LabelEncoder()
le.classes_ = joblib.load("label_classes.pkl")
# prepare the new data
new_data = [[5.8, 4.5, 6.4, 1.2]]
# get the predictions for new data
predictions = model.predict(new_data)
# convert the predictions back to the original iris species names
predictions = le.inverse_transform(predictions)
print(predictions)
['virginica']